# Load phyloseq objects
load("data/STJ2015_100_sym.RData"); phy100.f <- phy.f
load("data/STJ2015_sym.RData"); phy97.f <- phy.f
# Filter OTUs by minimum count
# Set threshold count
n <- 10
# Identify OTUs below threshold count
taxa97 <- taxa_sums(phy97.f)[which(taxa_sums(phy97.f) >= n)]
taxa100 <- taxa_sums(phy100.f)[which(taxa_sums(phy100.f) >= n)]
# Remove taxa below threshold count
phy97.f <- prune_taxa(names(taxa97), phy97.f)
phy100.f <- prune_taxa(names(taxa100), phy100.f)
# Filter samples by minimum count
# Set threshold number of reads
sn <- 100
# Remove samples with fewer reads than threshold
phy97.f <- prune_samples(sample_sums(phy97.f)>=sn, phy97.f)
phy100.f <- prune_samples(sample_sums(phy100.f)>=sn, phy100.f)
# Filter OTUs by minimum count again in case any dropped below threshold after filtering samples
# Identify OTUs below threshold count
taxa97 <- taxa_sums(phy97.f)[which(taxa_sums(phy97.f) >= n)]
taxa100 <- taxa_sums(phy100.f)[which(taxa_sums(phy100.f) >= n)]
# Remove taxa below threshold count
phy97.f <- prune_taxa(names(taxa97), phy97.f)
phy100.f <- prune_taxa(names(taxa100), phy100.f)
# Label clades and subtypes for filtered phyloseq object tax_tables
get.st <- function(df) {
within(df, {
Clade <- substr(hit, 1, 1)
Subtype <- gsub(hit, pattern="_[A-Z]{2}[0-9]{6}", replacement="")
Subtype <- gsub(Subtype, pattern="_multiple", replacement="")
Subtype2 <- ifelse(as.numeric(sim)==100, paste0("'", Subtype, "'"),
paste0("'[", rep(rle(sort(Subtype))$values, times=rle(sort(Subtype))$lengths), "]'^",
unlist(lapply(rle(sort(Subtype))$lengths, seq_len)))[order(order(Subtype))])
#Subtype <- ifelse(as.numeric(sim)==100, Subtype, paste("*", Subtype, sep=""))
})
}
tax_table(phy97.f) <- as.matrix(get.st(data.frame(tax_table(phy97.f), stringsAsFactors=FALSE)))
tax_table(phy100.f) <- as.matrix(get.st(data.frame(tax_table(phy100.f), stringsAsFactors=FALSE)))
# Compute summary statistics
stats97.f <- data.frame(`97% OTUs`=t(phystats(phy97.f)), check.names=F)
stats100.f <- data.frame(`100% OTUs`=t(phystats(phy100.f)), check.names=F)
# Create and plot histograms
taxhist97 <- hist(log10(taxa_sums(phy97.f)), plot=F)
taxhist100 <- hist(log10(taxa_sums(phy100.f)), plot=F)
samhist97 <- hist(log10(sample_sums(phy97.f)), plot=F)
samhist100 <- hist(log10(sample_sums(phy100.f)), plot=F)
par(mfrow=c(2, 4), mar=c(3,3,1,1))
plot(taxhist97, col="black", main="97% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist100, col="black", main="100% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(samhist97, col="black", main="97% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist100, col="black", main="100% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
# Create stats table
knitr::kable(cbind(stats97.f, stats100.f))
| 97% OTUs | 100% OTUs | |
|---|---|---|
| Total count in OTU table | 5552773 | 5008668 |
| Number of OTUs | 163 | 10314 |
| Range of OTU counts | 10 - 1718387 | 10 - 1216927 |
| Number of singleton OTUs | 0 | 0 |
| Number of samples | 171 | 171 |
| Range of reads per sample | 111 - 133007 | 101 - 124715 |
| Arithmetic mean (± SD) reads per sample | 32472 ± 29196 | 29290 ± 26840 |
| Geometric mean (*/ GSD) reads per sample | 15959 */ 4 | 14027 */ 4 |
Count data are transformed to both relative abundance (proportions) and square-root proportions for downstream statistical analyses.
# Convert to proportion (relative abundance)
phy97.f.p <- transform_sample_counts(phy97.f, function(x) x/sum(x))
phy100.f.p <- transform_sample_counts(phy100.f, function(x) x/sum(x))
# Apply transformation function
transform <- function(x) sqrt(x/sum(x)) # Set transformation function
phy97.f.t <- transform_sample_counts(phy97.f, transform) # Transform data
phy100.f.t <- transform_sample_counts(phy100.f, transform)
# Clade overview
cladeAbund <- aggregate(data.frame(RelAbund=rowSums(otu_table(phy97.f.p))),
by=list(Clade=data.frame(tax_table(phy97.f.p))$Clade), FUN=sum)
cladeAbund$Prop <- prop.table(cladeAbund$RelAbund)
bars <- barplot(cladeAbund$Prop*100, col=taxcolors, space=0,
names.arg=cladeAbund$Clade, xlab=expression(paste(italic('Symbiodinium'), " Clade")),
ylab="Relative abundance (%)")
text(bars, cladeAbund$Prop*100+2, labels=round(cladeAbund$Prop*100, 1), xpd=T)
cladeAbund$Notus <- table(data.frame(tax_table(phy97.f.p))$Clade)
cladeAbund
## Clade RelAbund Prop Notus
## 1 A 20.1515170975 0.117845129225 64
## 2 B 78.4566683456 0.458810925998 53
## 3 C 68.8767837335 0.402788208968 22
## 4 D 3.1757865659 0.018571851263 7
## 5 F 0.0524474215 0.000306710067 5
## 6 G 0.2348503500 0.001373393860 5
## 7 I 0.0519464860 0.000303780620 7
# Are Clade I's real?
CladeI <- subset_taxa(phy97.f.p, Clade=="I")
#tax_table(CladeI)
# Clades by sample
source("~/Documents/Academia/HIMB/USVI/STJ2012/R/functions.R")
composition <- function(phy, col, legend=T) {
samdat <- data.frame(sample_data(phy))
#samdat$Genus <- factor(samdat$Genus, levels=rev(levels(samdat$Genus)))
samdat$Species <- factor(samdat$Species, levels=rev(levels(samdat$Species)))
samdat$Site <- factor(samdat$Location, levels=rev(levels(samdat$Location)))
samdat <- samdat[with(samdat, order(Species, Site)), ]
typerelabund <- as.matrix(otu_table(phy)[order(data.frame(tax_table(phy))$hit),
rownames(samdat)])
sitebreaks <- c(as.character(samdat$Site), "X")==c("X", as.character(samdat$Site))
sitebreaks <- which(sitebreaks==F) - 1
spbreaks <- c(which(duplicated(samdat$Species)==F) - 1, nrow(samdat))
# Make Barplot
barplot(typerelabund, horiz=T, space=0, axes=F,axisnames=F, yaxs="i", col=col)
rect(0, 0, par("usr")[2], par("usr")[4], lwd=1, xpd=T)
axis(side=1, at=seq(0, 1, 0.1), line=0, tck=-0.025, mgp=c(0,0.25,0), cex.axis=0.7)
mtext(side=1, "Relative abundance", cex=0.7, line=1)
# Add legend
if (legend==T) {
legend(x=par("usr")[2]/2, y=par("usr")[4], xjust=0.5, yjust=0.25, horiz=T, bty="n", xpd=T,
cex=0.7, legend=c("A", "B", "C", "D", "F", "G"), fill=taxcolors, x.intersp=0.5)
legend(x=par("usr")[2]*1.1, y=par("usr")[4]*0.75, xjust=0, yjust=0.1, bty="n", xpd=T, cex=1,
pt.cex=1, legend=c("White Point", "West Tektite", "Cocoloba", "Cabritte Horn", "Booby Rock"), fill=c("red", "orange", "yellow", "green", "blue"), y.intersp=0.7,
x.intersp=0.3)
}
# Add grouping bars for Site
sitecolors <- matrix(c("red", "orange", "yellow", "green", "blue"),
dimnames=list(c("White Point", "West Tektite", "Cocoloba", "Cabritte Horn", "Booby Rock")))
for (i in 1:length(sitebreaks)) {
lines(c(0, 1), c(sitebreaks[i], sitebreaks[i]), lty=2, lwd=0.25)
rect(1.01, sitebreaks[i], 1.04, sitebreaks[i+1], col=sitecolors[samdat$Site[sitebreaks[i]+1],],
lwd=0.25, xpd=T)
}
# Add lines to separate species and species names
for (i in 1:length(spbreaks)) {
lines(c(0, 1.07), c(spbreaks[i], spbreaks[i]), xpd=T, type="l", lwd=0.4)
text(1.03, (spbreaks[i] + spbreaks[i+1]) / 2, xpd=T, pos=4, cex=0.8,
labels=paste(samdat$Genus[which(duplicated(samdat$Species)==F)][i], "\n",
samdat$Species[which(duplicated(samdat$Species)==F)][i], sep=""))
}
}
par(mfrow=c(1,1), mar=c(2, 1.5, 2, 10), lwd=0.1, cex=0.7, xpd=NA)
# Plot composition of 97% within-sample OTUs colored by clade
composition(phy97.f.p, col=taxcolors[factor(data.frame(tax_table(phy97.f.p))[order(data.frame(tax_table(phy97.f.p))$Subtype), ]$Clade, levels=c("A","B","C","D","F","G"))], legend=T)
composition(phy97.f.p, col=rainbow(ntaxa(phy97.f.p)), legend=F)
For each coral species, barplots are presented showing the relative abundance of OTUs obtained by 100% and 97%-within-sample clustering. OTUs comprising more than 4% of a sample are labeled with the unique OTU number and the Symbiodinium subtype and NCBI GenBank accession number of the closest BLAST hit for that OTU in the reference database. OTU numbers and barplot colors are NOT comparable across clustering methods.
source("R/functions.R")
# Create subsetted phyloseq objects for Pseudodiploria strigosa
pstr97.f.p <- subset_samples(phy97.f.p, Species=="Pstr")
pstr100.f.p <- subset_samples(phy100.f.p, Species=="Pstr")
# Plot custom barplots for Pseudodiploria strigosa
par(mfrow=c(2,1), mar=c(4,4,2,2), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(pstr97.f.p, main="97% OTUs")
otubarplot(pstr100.f.p, main="100% OTUs")
pstr.net <- makenet(pstr97.f.p, 0)
set.seed(54538)
plotnet(pstr.net)
# Create subsetted phyloseq objects for Pseudodiploria strigosa
ssid97.f.p <- subset_samples(phy97.f.p, Species=="Ssid")
ssid100.f.p <- subset_samples(phy100.f.p, Species=="Ssid")
# Plot custom barplots for Pseudodiploria strigosa
par(mfrow=c(2,1), mar=c(4,4,2,2), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(ssid97.f.p, main="97% OTUs")
otubarplot(ssid100.f.p, main="100% OTUs")
ssid.net <- makenet(ssid97.f.p, 0)
set.seed(54538)
plotnet(ssid.net)
# Create subsetted phyloseq objects for Pseudodiploria strigosa
dcyl97.f.p <- subset_samples(phy97.f.p, Species=="Dcyl")
dcyl100.f.p <- subset_samples(phy100.f.p, Species=="Dcyl")
# Plot custom barplots for Pseudodiploria strigosa
par(mfrow=c(2,1), mar=c(4,4,2,2), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(dcyl97.f.p, main="97% OTUs")
otubarplot(dcyl100.f.p, main="100% OTUs")
dcyl.net <- makenet(dcyl97.f.p, 0)
set.seed(54538)
plotnet(dcyl.net)
# Create subsetted phyloseq objects for Pseudodiploria strigosa
ffra97.f.p <- subset_samples(phy97.f.p, Species=="Ffra")
ffra100.f.p <- subset_samples(phy100.f.p, Species=="Ffra")
# Plot custom barplots for Pseudodiploria strigosa
par(mfrow=c(2,1), mar=c(4,4,2,2), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(ffra97.f.p, main="97% OTUs")
otubarplot(ffra100.f.p, main="100% OTUs")
ffra.net <- makenet(ffra97.f.p, 0)
set.seed(54538)
plotnet(ffra.net)
# Create subsetted phyloseq objects for Pseudodiploria strigosa
mcav97.f.p <- subset_samples(phy97.f.p, Species=="Mcav")
mcav100.f.p <- subset_samples(phy100.f.p, Species=="Mcav")
# Plot custom barplots for Pseudodiploria strigosa
par(mfrow=c(2,1), mar=c(4,4,2,2), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(mcav97.f.p, main="97% OTUs")
otubarplot(mcav100.f.p, main="100% OTUs")
mcav.net <- makenet(mcav97.f.p, 0)
set.seed(54538)
plotnet(mcav.net)
# Create subsetted phyloseq objects for Pseudodiploria strigosa
mmir97.f.p <- subset_samples(phy97.f.p, Species=="Mmir")
mmir100.f.p <- subset_samples(phy100.f.p, Species=="Mmir")
# Plot custom barplots for Pseudodiploria strigosa
par(mfrow=c(2,1), mar=c(4,4,2,2), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(mmir97.f.p, main="97% OTUs")
otubarplot(mmir100.f.p, main="100% OTUs")
mmir.net <- makenet(mmir97.f.p, 0)
set.seed(54538)
plotnet(mmir.net)
# Create subsetted phyloseq objects for Pseudodiploria strigosa
oann97.f.p <- subset_samples(phy97.f.p, Species=="Oann")
oann100.f.p <- subset_samples(phy100.f.p, Species=="Oann")
# Plot custom barplots for Pseudodiploria strigosa
par(mfrow=c(2,1), mar=c(4,4,2,2), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(oann97.f.p, main="97% OTUs")
otubarplot(oann100.f.p, main="100% OTUs")
oann.net <- makenet(oann97.f.p, 0)
set.seed(54538)
plotnet(oann.net)
# Create subsetted phyloseq objects for Pseudodiploria strigosa
past97.f.p <- subset_samples(phy97.f.p, Species=="Past")
past100.f.p <- subset_samples(phy100.f.p, Species=="Past")
# Plot custom barplots for Pseudodiploria strigosa
par(mfrow=c(2,1), mar=c(4,4,2,2), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(past97.f.p, main="97% OTUs")
otubarplot(past100.f.p, main="100% OTUs")
past.net <- makenet(past97.f.p, 0)
set.seed(54538)
plotnet(past.net)
# Create subsetted phyloseq objects for Pseudodiploria strigosa
srad97.f.p <- subset_samples(phy97.f.p, Species=="Srad")
srad100.f.p <- subset_samples(phy100.f.p, Species=="Srad")
# Plot custom barplots for Pseudodiploria strigosa
par(mfrow=c(2,1), mar=c(4,4,2,2), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(srad97.f.p, main="97% OTUs")
otubarplot(srad100.f.p, main="100% OTUs")
srad.net <- makenet(srad97.f.p, 0)
set.seed(54538)
plotnet(srad.net)
# Create subsetted phyloseq objects for Pseudodiploria strigosa
watr97.f.p <- subset_samples(phy97.f.p, Species=="Water")
watr100.f.p <- subset_samples(phy100.f.p, Species=="Water")
# Plot custom barplots for Pseudodiploria strigosa
par(mfrow=c(2,1), mar=c(4,4,2,2), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(watr97.f.p, main="97% OTUs")
otubarplot(watr100.f.p, main="100% OTUs")
watr.net <- makenet(watr97.f.p, 0)
set.seed(54538)
plotnet(watr.net)
plot_richness(subset_samples(phy97.f, Species=="Water"), x="Location")
## Warning: Removed 245 rows containing missing values (geom_errorbar).
plot_richness(subset_samples(phy100.f, Species=="Water"), x="Location")
## Warning: Removed 225 rows containing missing values (geom_errorbar).